In [1]:
# !pip install git+https://github.com/alberanid/imdbpy
# !pip install pandas
# !pip install numpy
# !pip install matplotlib
# !pip install seaborn
# !pip install pandas_profiling --upgrade
# !pip install plotly
# !pip install wordcloud
# !pip install Flask
In [2]:
# Import Dataset
# Import File from Loacal Drive
# from google.colab import files
# data_to_load = files.upload()
# from google.colab import drive
# drive.mount('/content/drive')
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import collections
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import iplot, init_notebook_mode
from wordcloud import WordCloud, STOPWORDS
from pandas_profiling import ProfileReport
%matplotlib inline
warnings.filterwarnings("ignore")
In [4]:
# path = '/content/drive/MyDrive/Files/'

path = 'C:\\Users\\pawan\\OneDrive\\Desktop\\ott\\Data\\'
 
df_tvshows = pd.read_csv(path + 'otttvshows.csv')
 
df_tvshows.head()
Out[4]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country Language Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type
0 1 Snowpiercer 2013 18+ 6.9 94% NaN Daveed Diggs,Iddo Goldberg,Mickey Sumner,Aliso... Action,Drama,Sci-Fi,Thriller United States English Set seven years after the world has become a f... 60.0 tv series 3.0 1 0 0 0 1
1 2 Philadelphia 1993 13+ 8.8 80% NaN Charlie Day,Glenn Howerton,Rob McElhenney,Kait... Comedy United States English The gang, 5 raging alcoholic, narcissists run ... 22.0 tv series 18.0 1 0 0 0 1
2 3 Roma 2018 18+ 8.7 93% NaN Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... Action,Drama,History,Romance,War United Kingdom,United States English In this British historical drama, the turbulen... 52.0 tv series 2.0 1 0 0 0 1
3 4 Amy 2015 18+ 7.0 87% NaN Amy Brenneman,Richard T. Jones,Jessica Tuck,Ma... Drama United States English A family drama focused on three generations of... 60.0 tv series 6.0 1 0 1 1 1
4 5 The Young Offenders 2016 NaN 8.0 100% NaN Alex Murphy,Chris Walley,Hilary Rose,Dominic M... Comedy United Kingdom,Ireland English NaN 30.0 tv series 3.0 1 0 0 0 1
In [5]:
# profile = ProfileReport(df_tvshows)
# profile
In [6]:
def data_investigate(df):
    print('No of Rows : ', df.shape[0])
    print('No of Coloums : ', df.shape[1])
    print('**'*25)
    print('Colums Names : \n', df.columns)
    print('**'*25)
    print('Datatype of Columns : \n', df.dtypes)
    print('**'*25)
    print('Missing Values : ')
    c = df.isnull().sum()
    c = c[c > 0]
    print(c)
    print('**'*25)
    print('Missing vaules %age wise :\n')
    print((100*(df.isnull().sum()/len(df.index))))
    print('**'*25)
    print('Pictorial Representation : ')
    plt.figure(figsize = (10, 10))
    sns.heatmap(df.isnull(), yticklabels = False, cbar = False)
    plt.show()
In [7]:
data_investigate(df_tvshows)
No of Rows :  5432
No of Coloums :  20
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb               float64
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime            float64
Kind                object
Seasons            float64
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
dtype: object
**************************************************
Missing Values : 
Age                1954
IMDb                556
Rotten Tomatoes    4194
Directors          5158
Cast                486
Genres              323
Country             549
Language            638
Plotline           2493
Runtime            1410
Seasons             679
dtype: int64
**************************************************
Missing vaules %age wise :

ID                  0.000000
Title               0.000000
Year                0.000000
Age                35.972018
IMDb               10.235641
Rotten Tomatoes    77.209131
Directors          94.955817
Cast                8.946981
Genres              5.946244
Country            10.106775
Language           11.745214
Plotline           45.894698
Runtime            25.957290
Kind                0.000000
Seasons            12.500000
Netflix             0.000000
Hulu                0.000000
Prime Video         0.000000
Disney+             0.000000
Type                0.000000
dtype: float64
**************************************************
Pictorial Representation : 
In [8]:
# ID
# df_tvshows = df_tvshows.drop(['ID'], axis = 1)
 
# Age
df_tvshows.loc[df_tvshows['Age'].isnull() & df_tvshows['Disney+'] == 1, "Age"] = '13'
# df_tvshows.fillna({'Age' : 18}, inplace = True)
df_tvshows.fillna({'Age' : 'NR'}, inplace = True)
df_tvshows['Age'].replace({'all': '0'}, inplace = True)
df_tvshows['Age'].replace({'7+': '7'}, inplace = True)
df_tvshows['Age'].replace({'13+': '13'}, inplace = True)
df_tvshows['Age'].replace({'16+': '16'}, inplace = True)
df_tvshows['Age'].replace({'18+': '18'}, inplace = True)
# df_tvshows['Age'] = df_tvshows['Age'].astype(int)
 
# IMDb
# df_tvshows.fillna({'IMDb' : df_tvshows['IMDb'].mean()}, inplace = True)
# df_tvshows.fillna({'IMDb' : df_tvshows['IMDb'].median()}, inplace = True)
df_tvshows.fillna({'IMDb' : "NA"}, inplace = True)
 
# Rotten Tomatoes
df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'][df_tvshows['Rotten Tomatoes'].notnull()].str.replace('%', '').astype(int)
# df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'][df_tvshows['Rotten Tomatoes'].notnull()].astype(int)
# df_tvshows.fillna({'Rotten Tomatoes' : df_tvshows['Rotten Tomatoes'].mean()}, inplace = True)
# df_tvshows.fillna({'Rotten Tomatoes' : df_tvshows['Rotten Tomatoes'].median()}, inplace = True)
# df_tvshows['Rotten Tomatoes'] = df_tvshows['Rotten Tomatoes'].astype(int)
df_tvshows.fillna({'Rotten Tomatoes' : "NA"}, inplace = True)
 
# Directors
# df_tvshows = df_tvshows.drop(['Directors'], axis = 1)
df_tvshows.fillna({'Directors' : "NA"}, inplace = True)
 
# Cast
df_tvshows.fillna({'Cast' : "NA"}, inplace = True)
 
# Genres
df_tvshows.fillna({'Genres': "NA"}, inplace = True)
 
# Country
df_tvshows.fillna({'Country': "NA"}, inplace = True)
 
# Language
df_tvshows.fillna({'Language': "NA"}, inplace = True)
 
# Plotline
df_tvshows.fillna({'Plotline': "NA"}, inplace = True)
 
# Runtime
# df_tvshows.fillna({'Runtime' : df_tvshows['Runtime'].mean()}, inplace = True)
# df_tvshows['Runtime'] = df_tvshows['Runtime'].astype(int)
df_tvshows.fillna({'Runtime' : "NA"}, inplace = True)
 
# Kind
# df_tvshows.fillna({'Kind': "NA"}, inplace = True)
 
# Type
# df_tvshows.fillna({'Type': "NA"}, inplace = True)
# df_tvshows = df_tvshows.drop(['Type'], axis = 1)
 
# Seasons
# df_tvshows.fillna({'Seasons': 1}, inplace = True)
df_tvshows.fillna({'Seasons': "NA"}, inplace = True)
# df_tvshows = df_tvshows.drop(['Seasons'], axis = 1)
# df_tvshows['Seasons'] = df_tvshows['Seasons'].astype(int)
# df_tvshows.fillna({'Seasons' : df_tvshows['Seasons'].mean()}, inplace = True)
# df_tvshows['Seasons'] = df_tvshows['Seasons'].astype(int)
 
# Service Provider
df_tvshows['Service Provider'] = df_tvshows.loc[:, ['Netflix', 'Prime Video', 'Disney+', 'Hulu']].idxmax(axis = 1)
# df_tvshows.drop(['Netflix','Prime Video','Disney+','Hulu'], axis = 1)

# Removing Duplicate and Missing Entries
df_tvshows.dropna(how = 'any', inplace = True)
df_tvshows.drop_duplicates(inplace = True)
In [9]:
data_investigate(df_tvshows)
No of Rows :  5432
No of Coloums :  21
**************************************************
Colums Names : 
 Index(['ID', 'Title', 'Year', 'Age', 'IMDb', 'Rotten Tomatoes', 'Directors',
       'Cast', 'Genres', 'Country', 'Language', 'Plotline', 'Runtime', 'Kind',
       'Seasons', 'Netflix', 'Hulu', 'Prime Video', 'Disney+', 'Type',
       'Service Provider'],
      dtype='object')
**************************************************
Datatype of Columns : 
 ID                   int64
Title               object
Year                 int64
Age                 object
IMDb                object
Rotten Tomatoes     object
Directors           object
Cast                object
Genres              object
Country             object
Language            object
Plotline            object
Runtime             object
Kind                object
Seasons             object
Netflix              int64
Hulu                 int64
Prime Video          int64
Disney+              int64
Type                 int64
Service Provider    object
dtype: object
**************************************************
Missing Values : 
Series([], dtype: int64)
**************************************************
Missing vaules %age wise :

ID                  0.0
Title               0.0
Year                0.0
Age                 0.0
IMDb                0.0
Rotten Tomatoes     0.0
Directors           0.0
Cast                0.0
Genres              0.0
Country             0.0
Language            0.0
Plotline            0.0
Runtime             0.0
Kind                0.0
Seasons             0.0
Netflix             0.0
Hulu                0.0
Prime Video         0.0
Disney+             0.0
Type                0.0
Service Provider    0.0
dtype: float64
**************************************************
Pictorial Representation : 
In [10]:
df_tvshows.head()
Out[10]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 1 Snowpiercer 2013 18 6.9 94 NA Daveed Diggs,Iddo Goldberg,Mickey Sumner,Aliso... Action,Drama,Sci-Fi,Thriller United States ... Set seven years after the world has become a f... 60 tv series 3 1 0 0 0 1 Netflix
1 2 Philadelphia 1993 13 8.8 80 NA Charlie Day,Glenn Howerton,Rob McElhenney,Kait... Comedy United States ... The gang, 5 raging alcoholic, narcissists run ... 22 tv series 18 1 0 0 0 1 Netflix
2 3 Roma 2018 18 8.7 93 NA Kevin McKidd,Ray Stevenson,Polly Walker,Kerry ... Action,Drama,History,Romance,War United Kingdom,United States ... In this British historical drama, the turbulen... 52 tv series 2 1 0 0 0 1 Netflix
3 4 Amy 2015 18 7 87 NA Amy Brenneman,Richard T. Jones,Jessica Tuck,Ma... Drama United States ... A family drama focused on three generations of... 60 tv series 6 1 0 1 1 1 Netflix
4 5 The Young Offenders 2016 NR 8 100 NA Alex Murphy,Chris Walley,Hilary Rose,Dominic M... Comedy United Kingdom,Ireland ... NA 30 tv series 3 1 0 0 0 1 Netflix

5 rows × 21 columns

In [11]:
df_tvshows.describe()
Out[11]:
ID Year Netflix Hulu Prime Video Disney+ Type
count 5432.000000 5432.000000 5432.000000 5432.000000 5432.000000 5432.000000 5432.0
mean 2716.500000 2010.668446 0.341311 0.293999 0.403351 0.033689 1.0
std 1568.227662 11.726176 0.474193 0.455633 0.490615 0.180445 0.0
min 1.000000 1901.000000 0.000000 0.000000 0.000000 0.000000 1.0
25% 1358.750000 2009.000000 0.000000 0.000000 0.000000 0.000000 1.0
50% 2716.500000 2014.000000 0.000000 0.000000 0.000000 0.000000 1.0
75% 4074.250000 2017.000000 1.000000 1.000000 1.000000 0.000000 1.0
max 5432.000000 2020.000000 1.000000 1.000000 1.000000 1.000000 1.0
In [12]:
df_tvshows.corr()
Out[12]:
ID Year Netflix Hulu Prime Video Disney+ Type
ID 1.000000 -0.031346 -0.646330 0.034293 0.441264 0.195409 NaN
Year -0.031346 1.000000 0.222316 -0.065807 -0.198675 -0.022741 NaN
Netflix -0.646330 0.222316 1.000000 -0.366515 -0.515086 -0.119344 NaN
Hulu 0.034293 -0.065807 -0.366515 1.000000 -0.377374 -0.075701 NaN
Prime Video 0.441264 -0.198675 -0.515086 -0.377374 1.000000 -0.151442 NaN
Disney+ 0.195409 -0.022741 -0.119344 -0.075701 -0.151442 1.000000 NaN
Type NaN NaN NaN NaN NaN NaN NaN
In [13]:
# df_tvshows.sort_values('Year', ascending = True)
# df_tvshows.sort_values('Seasons', ascending = False)
In [14]:
# df_tvshows.to_csv(path_or_buf= '/content/drive/MyDrive/Files/updated_otttvshows.csv', index = False)
 
# path = '/content/drive/MyDrive/Files/'
 
# udf_tvshows = pd.read_csv(path + 'updated_otttvshows.csv')
 
# udf_tvshows
In [15]:
# df_netflix_tvshows = df_tvshows.loc[(df_tvshows['Netflix'] > 0)]
# df_hulu_tvshows = df_tvshows.loc[(df_tvshows['Hulu'] > 0)]
# df_prime_video_tvshows = df_tvshows.loc[(df_tvshows['Prime Video'] > 0)]
# df_disney_tvshows = df_tvshows.loc[(df_tvshows['Disney+'] > 0)]
In [16]:
df_netflix_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 1) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 0)]
df_hulu_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 1) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 0)]
df_prime_video_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 1 ) & (df_tvshows['Disney+'] == 0)]
df_disney_only_tvshows = df_tvshows[(df_tvshows['Netflix'] == 0) & (df_tvshows['Hulu'] == 0) & (df_tvshows['Prime Video'] == 0 ) & (df_tvshows['Disney+'] == 1)]
In [17]:
df_tvshows_season = df_tvshows.copy()
In [18]:
df_tvshows_season.loc[df_tvshows_season['Seasons'] == "NA"] = 1
df_tvshows_season.drop(df_tvshows_season.loc[df_tvshows_season['Seasons'] == "NA"].index, inplace = True)
# df_tvshows_season = df_tvshows_season[df_tvshows_season.Seasons != "NA"]
df_tvshows_season['Seasons'] = df_tvshows_season['Seasons'].astype(int)
In [19]:
# Creating distinct dataframes only with the tvshows present on individual streaming platforms
netflix_season_tvshows = df_tvshows_season.loc[df_tvshows_season['Netflix'] == 1]
hulu_season_tvshows = df_tvshows_season.loc[df_tvshows_season['Hulu'] == 1]
prime_video_season_tvshows = df_tvshows_season.loc[df_tvshows_season['Prime Video'] == 1]
disney_season_tvshows = df_tvshows_season.loc[df_tvshows_season['Disney+'] == 1]
In [20]:
df_tvshows_season_group = df_tvshows_season.copy()
In [21]:
plt.figure(figsize = (10, 10))
corr = df_tvshows_season.corr()
# Plot figsize
fig, ax = plt.subplots(figsize=(10, 8))
# Generate Heat Map, allow annotations and place floats in map
sns.heatmap(corr, cmap = 'magma', annot = True, fmt = ".2f")
# Apply xticks
plt.xticks(range(len(corr.columns)), corr.columns);
# Apply yticks
plt.yticks(range(len(corr.columns)), corr.columns)
# show plot
plt.show()
fig.show()
<Figure size 720x720 with 0 Axes>
In [22]:
df_season_high_tvshows = df_tvshows_season.sort_values(by = 'Seasons', ascending = False).reset_index()
df_season_high_tvshows = df_season_high_tvshows.drop(['index'], axis = 1)
# filter = (df_tvshows_season['Seasons'] == (df_tvshows_season['Seasons'].max()))
# df_season_high_tvshows = df_tvshows_season[filter]
 
# highest_rated_tvshows = df_tvshows_season.loc[df_tvshows_season['Seasons'].idxmax()]
 
print('\nTV Shows with Highest Ever Seasons  are : \n')
df_season_high_tvshows.head(5)
TV Shows with Highest Ever Seasons  are : 

Out[22]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 723 Case Closed 1996 7 8.4 NA NA Minami Takayama,Wakana Yamazaki,Hoang Khuyet,A... Animation,Action,Adventure,Comedy,Crime,Drama,... Japan,Italy,United Kingdom,Mexico,Spain ... NA 25 tv series 53 1 1 0 0 1 Netflix
1 2481 Sesame Street 1969 0 8.1 NA NA Caroll Spinney,Frank Oz,Jerry Nelson,Sonia Man... Animation,Adventure,Comedy,Family,Fantasy,Music United States ... NA 55 tv series 51 0 1 1 0 1 Prime Video
2 2142 Bottersnikes & Gumbles 2016 7 5.2 NA NA Ray Chase,Robbie Daymond,Tara Sands,Alex Babic... Animation,Adventure,Comedy,Family United States,United Kingdom,Australia ... When unemployed dockworker Joey Coyle finds $1... 11 tv series 51 1 0 0 0 1 Netflix
3 2747 Chopped 2009 0 7.5 NA NA Ted Allen,Amanda Freitag,Alex Guarnaschelli,Ma... Reality-TV United States ... Follows James Isaac "Jimmy" Neutron (Debi Derr... 60 tv series 49 0 1 0 0 1 Hulu
4 869 NOVA 1974 16 8.7 NA NA Jay O. Sanders,Craig Sechler,Lance Lewman,Will... Documentary United States ... A police investigator named Arnar travels from... 60 tv series 48 1 0 1 0 1 Netflix

5 rows × 21 columns

In [23]:
fig = px.bar(y = df_season_high_tvshows['Title'][:15],
             x = df_season_high_tvshows['Seasons'][:15], 
             color = df_season_high_tvshows['Seasons'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Highest Seasons in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [24]:
df_season_low_tvshows = df_tvshows_season.sort_values(by = 'Seasons', ascending = True).reset_index()
df_season_low_tvshows = df_season_low_tvshows.drop(['index'], axis = 1)
# filter = (df_tvshows_season['Seasons'] == (df_tvshows_season['Seasons'].min()))
# df_season_low_tvshows = df_tvshows_season[filter]

print('\nTV Shows with Lowest Ever Seasons  are : \n')
df_season_low_tvshows.head(5)
TV Shows with Lowest Ever Seasons  are : 

Out[24]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 5432 United States of Animals 2016 13 6.3 NA NA Quincy Dunn-Baker,Jo Mendenhall,Dale Anderson,... NA United States ... NA 22 tv series 1 0 0 0 1 1 Disney+
1 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
2 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
3 1 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
4 2208 Wild Alaska 2014 NR 7.9 NA NA Denny Heide,Harvey Kitka,Dougray Scott Documentary United Kingdom ... NA NA tv series 1 1 0 0 0 1 Netflix

5 rows × 21 columns

In [25]:
fig = px.bar(y = df_season_low_tvshows['Title'][:15],
             x = df_season_low_tvshows['Seasons'][:15], 
             color = df_season_low_tvshows['Seasons'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Lowest Seasons in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [26]:
print(f'''
      Total '{df_tvshows_season['Seasons'].unique().shape[0]}' unique Seasons s were Given, They were Like this,\n
      
{df_tvshows_season.sort_values(by = 'Seasons', ascending = False)['Seasons'].unique()}\n
 
      The Highest Ever Seasons Ever Any TV Show Got is '{df_season_high_tvshows['Title'][0]}' : '{df_season_high_tvshows['Seasons'].max()}'\n
 
      The Lowest Ever Seasons Ever Any TV Show Got is '{df_season_low_tvshows['Title'][0]}' : '{df_season_low_tvshows['Seasons'].min()}'\n
      ''')
      Total '48' unique Seasons s were Given, They were Like this,

      
[53 51 49 48 46 43 42 41 40 39 38 37 36 35 34 33 32 31 30 29 28 27 26 25
 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1]

 
      The Highest Ever Seasons Ever Any TV Show Got is 'Case Closed' : '53'

 
      The Lowest Ever Seasons Ever Any TV Show Got is 'United States of Animals' : '1'

      
In [27]:
netflix_season_high_tvshows = df_season_high_tvshows.loc[df_season_high_tvshows['Netflix']==1].reset_index()
netflix_season_high_tvshows = netflix_season_high_tvshows.drop(['index'], axis = 1)
 
netflix_season_low_tvshows = df_season_low_tvshows.loc[df_season_low_tvshows['Netflix']==1].reset_index()
netflix_season_low_tvshows = netflix_season_low_tvshows.drop(['index'], axis = 1)
 
netflix_season_high_tvshows.head(5)
Out[27]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 723 Case Closed 1996 7 8.4 NA NA Minami Takayama,Wakana Yamazaki,Hoang Khuyet,A... Animation,Action,Adventure,Comedy,Crime,Drama,... Japan,Italy,United Kingdom,Mexico,Spain ... NA 25 tv series 53 1 1 0 0 1 Netflix
1 2142 Bottersnikes & Gumbles 2016 7 5.2 NA NA Ray Chase,Robbie Daymond,Tara Sands,Alex Babic... Animation,Adventure,Comedy,Family United States,United Kingdom,Australia ... When unemployed dockworker Joey Coyle finds $1... 11 tv series 51 1 0 0 0 1 Netflix
2 869 NOVA 1974 16 8.7 NA NA Jay O. Sanders,Craig Sechler,Lance Lewman,Will... Documentary United States ... A police investigator named Arnar travels from... 60 tv series 48 1 0 1 0 1 Netflix
3 1214 American Masters 1986 16 8 NA NA Martin Scorsese,Peter Bogdanovich,Tony Bennett... Documentary,Biography United States ... NA 90 tv series 35 1 0 0 0 1 Netflix
4 996 America's Funniest Home Videos 1989 0 6.2 NA NA Jess Harnell,Tom Bergeron,Bob Saget,Ernie Ande... Comedy,Family,Reality-TV United States ... NA 30 tv series 31 1 1 0 1 1 Netflix

5 rows × 21 columns

In [28]:
fig = px.bar(y = netflix_season_high_tvshows['Title'][:15],
             x = netflix_season_high_tvshows['Seasons'][:15], 
             color = netflix_season_high_tvshows['Seasons'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Highest Seasons in Minutes : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [29]:
fig = px.bar(y = netflix_season_low_tvshows['Title'][:15],
             x = netflix_season_low_tvshows['Seasons'][:15], 
             color = netflix_season_low_tvshows['Seasons'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Lowest Seasons in Minutes : Netflix')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [30]:
hulu_season_high_tvshows = df_season_high_tvshows.loc[df_season_high_tvshows['Hulu']==1].reset_index()
hulu_season_high_tvshows = hulu_season_high_tvshows.drop(['index'], axis = 1)
 
hulu_season_low_tvshows = df_season_low_tvshows.loc[df_season_low_tvshows['Hulu']==1].reset_index()
hulu_season_low_tvshows = hulu_season_low_tvshows.drop(['index'], axis = 1)
 
hulu_season_high_tvshows.head(5)
Out[30]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 723 Case Closed 1996 7 8.4 NA NA Minami Takayama,Wakana Yamazaki,Hoang Khuyet,A... Animation,Action,Adventure,Comedy,Crime,Drama,... Japan,Italy,United Kingdom,Mexico,Spain ... NA 25 tv series 53 1 1 0 0 1 Netflix
1 2481 Sesame Street 1969 0 8.1 NA NA Caroll Spinney,Frank Oz,Jerry Nelson,Sonia Man... Animation,Adventure,Comedy,Family,Fantasy,Music United States ... NA 55 tv series 51 0 1 1 0 1 Prime Video
2 2747 Chopped 2009 0 7.5 NA NA Ted Allen,Amanda Freitag,Alex Guarnaschelli,Ma... Reality-TV United States ... Follows James Isaac "Jimmy" Neutron (Debi Derr... 60 tv series 49 0 1 0 0 1 Hulu
3 2401 Saturday Night Live 1975 16 8.1 NA NA The Saturday Night Live Band,Don Pardo,Darrell... Comedy,Music United States ... Dre Johnson (Anthony Anderson) has it all: a g... 90 tv series 46 0 1 0 0 1 Hulu
4 3357 Inside the NFL 1977 7 7 NA NA Phil Simms,Cris Collinsworth,Warren Sapp,James... News,Sport,Talk-Show United States ... In 2001 intern Chandra Levy vanishes. Police s... 60 tv series 43 0 1 0 0 1 Hulu

5 rows × 21 columns

In [31]:
fig = px.bar(y = hulu_season_high_tvshows['Title'][:15],
             x = hulu_season_high_tvshows['Seasons'][:15], 
             color = hulu_season_high_tvshows['Seasons'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Highest Seasons in Minutes : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [32]:
fig = px.bar(y = hulu_season_low_tvshows['Title'][:15],
             x = hulu_season_low_tvshows['Seasons'][:15], 
             color = hulu_season_low_tvshows['Seasons'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Lowest Seasons in Minutes : Hulu')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [33]:
prime_video_season_high_tvshows = df_season_high_tvshows.loc[df_season_high_tvshows['Prime Video']==1].reset_index()
prime_video_season_high_tvshows = prime_video_season_high_tvshows.drop(['index'], axis = 1)
 
prime_video_season_low_tvshows = df_season_low_tvshows.loc[df_season_low_tvshows['Prime Video']==1].reset_index()
prime_video_season_low_tvshows = prime_video_season_low_tvshows.drop(['index'], axis = 1)
 
prime_video_season_high_tvshows.head(5)
Out[33]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 2481 Sesame Street 1969 0 8.1 NA NA Caroll Spinney,Frank Oz,Jerry Nelson,Sonia Man... Animation,Adventure,Comedy,Family,Fantasy,Music United States ... NA 55 tv series 51 0 1 1 0 1 Prime Video
1 869 NOVA 1974 16 8.7 NA NA Jay O. Sanders,Craig Sechler,Lance Lewman,Will... Documentary United States ... A police investigator named Arnar travels from... 60 tv series 48 1 0 1 0 1 Netflix
2 3586 Videofashion! News 2006 NR 5.6 NA NA Malan Breton,Ingrid Graham,Kelly Hughes,Adrian... Documentary,News United States ... NA NA tv series 42 0 1 1 0 1 Prime Video
3 2402 Survivor 2000 16 7.2 NA NA Jeff Probst,Rob Mariano,Parvati Shallow,Sandra... Adventure,Game-Show,Reality-TV United States ... Tandy Bowen and Tyrone Johnson, two teenagers ... 60 tv series 41 0 1 1 0 1 Prime Video
4 4590 Fashion News Live 2004 NR 6.6 NA NA Rocco Leo Gaglioti,J. Alexander,Nigel Barker,C... News United States ... Christmas Through the Decades explores the tra... NA tv series 40 0 0 1 0 1 Prime Video

5 rows × 21 columns

In [34]:
fig = px.bar(y = prime_video_season_high_tvshows['Title'][:15],
             x = prime_video_season_high_tvshows['Seasons'][:15], 
             color = prime_video_season_high_tvshows['Seasons'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Highest Seasons in Minutes : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [35]:
fig = px.bar(y = prime_video_season_low_tvshows['Title'][:15],
             x = prime_video_season_low_tvshows['Seasons'][:15], 
             color = prime_video_season_low_tvshows['Seasons'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Lowest Seasons in Minutes : Prime Video')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [36]:
disney_season_high_tvshows = df_season_high_tvshows.loc[df_season_high_tvshows['Disney+']==1].reset_index()
disney_season_high_tvshows = disney_season_high_tvshows.drop(['index'], axis = 1)
 
disney_season_low_tvshows = df_season_low_tvshows.loc[df_season_low_tvshows['Disney+']==1].reset_index()
disney_season_low_tvshows = disney_season_low_tvshows.drop(['index'], axis = 1)
 
disney_season_high_tvshows.head(5)
Out[36]:
ID Title Year Age IMDb Rotten Tomatoes Directors Cast Genres Country ... Plotline Runtime Kind Seasons Netflix Hulu Prime Video Disney+ Type Service Provider
0 2262 The Simpsons 1989 16 8.6 85 NA Dan Castellaneta,Nancy Cartwright,Harry Sheare... Animation,Comedy United States ... The curious, adventure-seeking, fourth grade g... 22 tv series 34 0 1 0 1 1 Disney+
1 996 America's Funniest Home Videos 1989 0 6.2 NA NA Jess Harnell,Tom Bergeron,Bob Saget,Ernie Ande... Comedy,Family,Reality-TV United States ... NA 30 tv series 31 1 1 0 1 1 Netflix
2 5338 Little Einsteins 2005 0 6.2 NA NA David Jason,Aiden Pompey,Jesse Schwartz,Natali... Animation,Adventure,Comedy,Family,Music,Musica... United States ... The adventures of best friends CeCe Jones and ... NA tv series 20 0 0 0 1 1 Disney+
3 2768 The Incredible Dr. Pol 2011 7 8.6 NA NA Jan Pol,Ari Rubin,Diane Pol,Charles Pol,Brenda... Documentary,Reality-TV United States ... Allen Walker, a young Exorcist with a cursed e... 44 tv series 18 0 1 0 1 1 Disney+
4 2715 Life Below Zero 2013 7 8 NA NA James Franzo,Sue Aikens,Agnes Hailstone,Chip H... Documentary,Adventure,Reality-TV United States ... Bobby Donnell is the head of a struggling Bost... 44 tv series 14 0 1 0 1 1 Disney+

5 rows × 21 columns

In [37]:
fig = px.bar(y = disney_season_high_tvshows['Title'][:15],
             x = disney_season_high_tvshows['Seasons'][:15], 
             color = disney_season_high_tvshows['Seasons'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Highest Seasons in Minutes : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [38]:
fig = px.bar(y = disney_season_low_tvshows['Title'][:15],
             x = disney_season_low_tvshows['Seasons'][:15], 
             color = disney_season_low_tvshows['Seasons'][:15],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Lowest Seasons in Minutes : Disney+')

fig.update_layout(plot_bgcolor = 'white')
fig.show()
In [39]:
print(f'''
      The TV Show with Highest Seasons  Ever Got is '{df_season_high_tvshows['Title'][0]}' : '{df_season_high_tvshows['Seasons'].max()}'\n
      The TV Show with Lowest Seasons  Ever Got is '{df_season_low_tvshows['Title'][0]}' : '{df_season_low_tvshows['Seasons'].min()}'\n
      
      The TV Show with Highest Seasons  on 'Netflix' is '{netflix_season_high_tvshows['Title'][0]}' : '{netflix_season_high_tvshows['Seasons'].max()}'\n
      The TV Show with Lowest Seasons  on 'Netflix' is '{netflix_season_low_tvshows['Title'][0]}' : '{netflix_season_low_tvshows['Seasons'].min()}'\n
      
      The TV Show with Highest Seasons  on 'Hulu' is '{hulu_season_high_tvshows['Title'][0]}' : '{hulu_season_high_tvshows['Seasons'].max()}'\n
      The TV Show with Lowest Seasons  on 'Hulu' is '{hulu_season_low_tvshows['Title'][0]}' : '{hulu_season_low_tvshows['Seasons'].min()}'\n
      
      The TV Show with Highest Seasons  on 'Prime Video' is '{prime_video_season_high_tvshows['Title'][0]}' : '{prime_video_season_high_tvshows['Seasons'].max()}'\n
      The TV Show with Lowest Seasons  on 'Prime Video' is '{prime_video_season_low_tvshows['Title'][0]}' : '{prime_video_season_low_tvshows['Seasons'].min()}'\n
      
      The TV Show with Highest Seasons  on 'Disney+' is '{disney_season_high_tvshows['Title'][0]}' : '{disney_season_high_tvshows['Seasons'].max()}'\n
      The TV Show with Lowest Seasons  on 'Disney+' is '{disney_season_low_tvshows['Title'][0]}' : '{disney_season_low_tvshows['Seasons'].min()}'\n 
      ''')
      The TV Show with Highest Seasons  Ever Got is 'Case Closed' : '53'

      The TV Show with Lowest Seasons  Ever Got is 'United States of Animals' : '1'

      
      The TV Show with Highest Seasons  on 'Netflix' is 'Case Closed' : '53'

      The TV Show with Lowest Seasons  on 'Netflix' is '1' : '1'

      
      The TV Show with Highest Seasons  on 'Hulu' is 'Case Closed' : '53'

      The TV Show with Lowest Seasons  on 'Hulu' is '1' : '1'

      
      The TV Show with Highest Seasons  on 'Prime Video' is 'Sesame Street' : '51'

      The TV Show with Lowest Seasons  on 'Prime Video' is '1' : '1'

      
      The TV Show with Highest Seasons  on 'Disney+' is 'The Simpsons' : '34'

      The TV Show with Lowest Seasons  on 'Disney+' is 'United States of Animals' : '1'
 
      
In [40]:
print(f'''
      Accross All Platforms the Average Seasons  is '{round(df_tvshows_season['Seasons'].mean(), ndigits = 2)}'\n
      The Average Seasons  on 'Netflix' is '{round(netflix_season_tvshows['Seasons'].mean(), ndigits = 2)}'\n
      The Average Seasons  on 'Hulu' is '{round(hulu_season_tvshows['Seasons'].mean(), ndigits = 2)}'\n
      The Average Seasons  on 'Prime Video' is '{round(prime_video_season_tvshows['Seasons'].mean(), ndigits = 2)}'\n
      The Average Seasons  on 'Disney+' is '{round(disney_season_tvshows['Seasons'].mean(), ndigits = 2)}'\n 
      ''')
      Accross All Platforms the Average Seasons  is '3.01'

      The Average Seasons  on 'Netflix' is '2.3'

      The Average Seasons  on 'Hulu' is '3.54'

      The Average Seasons  on 'Prime Video' is '2.56'

      The Average Seasons  on 'Disney+' is '1.45'
 
      
In [41]:
f, ax = plt.subplots(1, 2 , figsize = (20, 5))
sns.distplot(df_tvshows_season['Seasons'],bins = 20, kde = True, ax = ax[0])
sns.boxplot(df_tvshows_season['Seasons'], ax = ax[1])
plt.show()
In [42]:
# Defining plot size and title
plt.figure(figsize = (20, 5))
plt.title('Seasons s Per Platform')
 
# Plotting the information from each dataset into a histogram
sns.histplot(prime_video_season_tvshows['Seasons'][:100], color = 'lightblue', legend = True, kde = True)
sns.histplot(netflix_season_tvshows['Seasons'][:100], color = 'red', legend = True, kde = True)
sns.histplot(hulu_season_tvshows['Seasons'][:100], color = 'lightgreen', legend = True, kde = True)
sns.histplot(disney_season_tvshows['Seasons'][:100], color = 'darkblue', legend = True, kde = True) 
 
# Setting the legend
plt.legend(['Prime Video', 'Netflix', 'Hulu', 'Disney+'])
plt.show()
In [43]:
def round_val(data):
    if str(data) != 'nan':
        return round(data)
In [44]:
df_tvshows_season_group['Seasons Group'] = df_tvshows_season['Seasons'].apply(round_val)
 
season_values = df_tvshows_season_group['Seasons Group'].value_counts().sort_index(ascending = False).tolist()
season_index = df_tvshows_season_group['Seasons Group'].value_counts().sort_index(ascending = False).index
 
# season_values, season_index
In [45]:
season_group_count = df_tvshows_season_group.groupby('Seasons Group')['Title'].count()
season_group_tvshows = df_tvshows_season_group.groupby('Seasons Group')[['Netflix', 'Hulu', 'Prime Video', 'Disney+']].sum()
season_group_data_tvshows = pd.concat([season_group_count, season_group_tvshows], axis = 1).reset_index().rename(columns = {'Title' : 'TV Shows Count'})
season_group_data_tvshows = season_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
In [46]:
# Seasons Group with TV Shows Counts - All Platforms Combined
season_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)
Out[46]:
Seasons Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1 2720 1491 1129 1483 728
1 2 849 338 252 268 40
2 3 542 204 159 188 34
3 4 364 131 129 114 20
4 5 243 92 96 84 5
5 6 172 45 81 72 5
6 7 119 41 66 41 2
7 8 97 29 49 28 1
8 9 67 14 33 22 4
9 10 39 8 23 14 0
10 11 37 9 24 12 0
11 12 27 4 15 16 0
12 13 22 3 15 9 0
13 14 17 2 12 5 1
14 15 17 6 10 3 0
17 18 15 3 8 4 1
15 16 8 4 5 2 0
20 21 8 2 4 2 0
16 17 7 2 7 0 0
19 20 7 1 2 4 1
18 19 6 0 5 2 0
21 22 5 1 4 0 0
24 25 4 1 3 2 0
23 24 4 1 4 2 0
26 27 4 1 3 0 0
30 31 3 1 2 2 1
29 30 3 0 3 0 0
22 23 2 1 1 2 0
46 51 2 1 1 1 0
31 32 2 0 2 1 0
32 33 2 0 0 2 0
25 26 2 0 2 0 0
38 39 1 0 0 1 0
27 28 1 0 1 0 0
28 29 1 0 1 0 0
33 34 1 0 1 0 1
34 35 1 1 0 0 0
35 36 1 0 1 0 0
37 38 1 0 1 0 0
42 43 1 0 1 0 0
36 37 1 0 1 0 0
40 41 1 0 1 1 0
41 42 1 0 1 1 0
43 46 1 0 1 0 0
44 48 1 1 0 1 0
45 49 1 0 1 0 0
39 40 1 0 0 1 0
47 53 1 1 1 0 0
In [47]:
season_group_data_tvshows.sort_values(by = 'Seasons Group', ascending = False)
Out[47]:
Seasons Group TV Shows Count Netflix Hulu Prime Video Disney+
47 53 1 1 1 0 0
46 51 2 1 1 1 0
45 49 1 0 1 0 0
44 48 1 1 0 1 0
43 46 1 0 1 0 0
42 43 1 0 1 0 0
41 42 1 0 1 1 0
40 41 1 0 1 1 0
39 40 1 0 0 1 0
38 39 1 0 0 1 0
37 38 1 0 1 0 0
36 37 1 0 1 0 0
35 36 1 0 1 0 0
34 35 1 1 0 0 0
33 34 1 0 1 0 1
32 33 2 0 0 2 0
31 32 2 0 2 1 0
30 31 3 1 2 2 1
29 30 3 0 3 0 0
28 29 1 0 1 0 0
27 28 1 0 1 0 0
26 27 4 1 3 0 0
25 26 2 0 2 0 0
24 25 4 1 3 2 0
23 24 4 1 4 2 0
22 23 2 1 1 2 0
21 22 5 1 4 0 0
20 21 8 2 4 2 0
19 20 7 1 2 4 1
18 19 6 0 5 2 0
17 18 15 3 8 4 1
16 17 7 2 7 0 0
15 16 8 4 5 2 0
14 15 17 6 10 3 0
13 14 17 2 12 5 1
12 13 22 3 15 9 0
11 12 27 4 15 16 0
10 11 37 9 24 12 0
9 10 39 8 23 14 0
8 9 67 14 33 22 4
7 8 97 29 49 28 1
6 7 119 41 66 41 2
5 6 172 45 81 72 5
4 5 243 92 96 84 5
3 4 364 131 129 114 20
2 3 542 204 159 188 34
1 2 849 338 252 268 40
0 1 2720 1491 1129 1483 728
In [48]:
fig = px.bar(y = season_group_data_tvshows['TV Shows Count'],
             x = season_group_data_tvshows['Seasons Group'], 
             color = season_group_data_tvshows['Seasons Group'],
             color_continuous_scale = 'Teal_r', 
             labels = { 'y' : 'TV Shows Count', 'x' : 'Seasons : In Minutes'},
             title  = 'TV Shows with Group Seasons in Minutes : All Platforms')

fig.update_layout(plot_bgcolor = "white")
fig.show()
In [49]:
fig = px.pie(season_group_data_tvshows[:10],
             names = season_group_data_tvshows['Seasons Group'][:10],
             values = season_group_data_tvshows['TV Shows Count'][:10],
             color = season_group_data_tvshows['TV Shows Count'][:10],
             color_discrete_sequence = px.colors.sequential.Teal)

fig.update_traces(textinfo = 'percent+label',
                  title = 'TV Shows Count based on Seasons Group')
fig.show()
In [50]:
df_season_group_high_tvshows = season_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False).reset_index()
df_season_group_high_tvshows = df_season_group_high_tvshows.drop(['index'], axis = 1)
# filter = (season_group_data_tvshows['TV Shows Count'] ==  (season_group_data_tvshows['TV Shows Count'].max()))
# df_season_group_high_tvshows = season_group_data_tvshows[filter]
 
# highest_rated_tvshows = season_group_data_tvshows.loc[season_group_data_tvshows['TV Shows Count'].idxmax()]
 
# print('\nSeasons with Highest Ever TV Shows Count are : All Platforms Combined\n')
df_season_group_high_tvshows.head(5)
Out[50]:
Seasons Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1 2720 1491 1129 1483 728
1 2 849 338 252 268 40
2 3 542 204 159 188 34
3 4 364 131 129 114 20
4 5 243 92 96 84 5
In [51]:
df_season_group_low_tvshows = season_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = True).reset_index()
df_season_group_low_tvshows = df_season_group_low_tvshows.drop(['index'], axis = 1)
# filter = (season_group_data_tvshows['TV Shows Count'] = =  (season_group_data_tvshows['TV Shows Count'].min()))
# df_season_group_low_tvshows = season_group_data_tvshows[filter]
 
# print('\nSeasons with Lowest Ever TV Shows Count are : All Platforms Combined\n')
df_season_group_low_tvshows.head(5)
Out[51]:
Seasons Group TV Shows Count Netflix Hulu Prime Video Disney+
0 53 1 1 1 0 0
1 40 1 0 0 1 0
2 49 1 0 1 0 0
3 48 1 1 0 1 0
4 43 1 0 1 0 0
In [52]:
print(f'''
      Total '{df_tvshows_season['Seasons'].count()}' Titles are available on All Platforms, out of which\n
      You Can Choose to see TV Shows from Total '{season_group_data_tvshows['Seasons Group'].unique().shape[0]}' Seasons Group, They were Like this, \n
 
      {season_group_data_tvshows.sort_values(by = 'TV Shows Count', ascending = False)['Seasons Group'].unique()} etc. \n
 
      The Seasons Group with Highest TV Shows Count have '{season_group_data_tvshows['TV Shows Count'].max()}' TV Shows Available is '{df_season_group_high_tvshows['Seasons Group'][0]}', &\n
      The Seasons Group with Lowest TV Shows Count have '{season_group_data_tvshows['TV Shows Count'].min()}' TV Shows Available is '{df_season_group_low_tvshows['Seasons Group'][0]}'
      ''')
      Total '5432' Titles are available on All Platforms, out of which

      You Can Choose to see TV Shows from Total '48' Seasons Group, They were Like this, 

 
      [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 18 16 21 17 20 19 22 25 24
 27 31 30 23 51 32 33 26 39 28 29 34 35 36 38 43 37 41 42 46 48 49 40 53] etc. 

 
      The Seasons Group with Highest TV Shows Count have '2720' TV Shows Available is '1', &

      The Seasons Group with Lowest TV Shows Count have '1' TV Shows Available is '53'
      
In [53]:
netflix_season_group_tvshows = season_group_data_tvshows[season_group_data_tvshows['Netflix'] !=  0].sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_season_group_tvshows = netflix_season_group_tvshows.drop(['index', 'Hulu', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
 
netflix_season_group_high_tvshows = df_season_group_high_tvshows.sort_values(by = 'Netflix', ascending = False).reset_index()
netflix_season_group_high_tvshows = netflix_season_group_high_tvshows.drop(['index'], axis = 1)
 
netflix_season_group_low_tvshows = df_season_group_high_tvshows.sort_values(by = 'Netflix', ascending = True).reset_index()
netflix_season_group_low_tvshows = netflix_season_group_low_tvshows.drop(['index'], axis = 1)
 
netflix_season_group_high_tvshows.head(5)
Out[53]:
Seasons Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1 2720 1491 1129 1483 728
1 2 849 338 252 268 40
2 3 542 204 159 188 34
3 4 364 131 129 114 20
4 5 243 92 96 84 5
In [54]:
hulu_season_group_tvshows = season_group_data_tvshows[season_group_data_tvshows['Hulu'] !=  0].sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_season_group_tvshows = hulu_season_group_tvshows.drop(['index', 'Netflix', 'Prime Video', 'Disney+', 'TV Shows Count'], axis = 1)
 
hulu_season_group_high_tvshows = df_season_group_high_tvshows.sort_values(by = 'Hulu', ascending = False).reset_index()
hulu_season_group_high_tvshows = hulu_season_group_high_tvshows.drop(['index'], axis = 1)
 
hulu_season_group_low_tvshows = df_season_group_high_tvshows.sort_values(by = 'Hulu', ascending = True).reset_index()
hulu_season_group_low_tvshows = hulu_season_group_low_tvshows.drop(['index'], axis = 1)
 
hulu_season_group_high_tvshows.head(5)
Out[54]:
Seasons Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1 2720 1491 1129 1483 728
1 2 849 338 252 268 40
2 3 542 204 159 188 34
3 4 364 131 129 114 20
4 5 243 92 96 84 5
In [55]:
prime_video_season_group_tvshows = season_group_data_tvshows[season_group_data_tvshows['Prime Video'] !=  0].sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_season_group_tvshows = prime_video_season_group_tvshows.drop(['index', 'Netflix', 'Hulu', 'Disney+', 'TV Shows Count'], axis = 1)
 
prime_video_season_group_high_tvshows = df_season_group_high_tvshows.sort_values(by = 'Prime Video', ascending = False).reset_index()
prime_video_season_group_high_tvshows = prime_video_season_group_high_tvshows.drop(['index'], axis = 1)
 
prime_video_season_group_low_tvshows = df_season_group_high_tvshows.sort_values(by = 'Prime Video', ascending = True).reset_index()
prime_video_season_group_low_tvshows = prime_video_season_group_low_tvshows.drop(['index'], axis = 1)
 
prime_video_season_group_high_tvshows.head(5)
Out[55]:
Seasons Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1 2720 1491 1129 1483 728
1 2 849 338 252 268 40
2 3 542 204 159 188 34
3 4 364 131 129 114 20
4 5 243 92 96 84 5
In [56]:
disney_season_group_tvshows = season_group_data_tvshows[season_group_data_tvshows['Disney+'] !=  0].sort_values(by = 'Disney+', ascending = False).reset_index()
disney_season_group_tvshows = disney_season_group_tvshows.drop(['index', 'Netflix', 'Hulu', 'Prime Video', 'TV Shows Count'], axis = 1)
 
disney_season_group_high_tvshows = df_season_group_high_tvshows.sort_values(by = 'Disney+', ascending = False).reset_index()
disney_season_group_high_tvshows = disney_season_group_high_tvshows.drop(['index'], axis = 1)
 
disney_season_group_low_tvshows = df_season_group_high_tvshows.sort_values(by = 'Disney+', ascending = True).reset_index()
disney_season_group_low_tvshows = disney_season_group_low_tvshows.drop(['index'], axis = 1)
 
disney_season_group_high_tvshows.head(5)
Out[56]:
Seasons Group TV Shows Count Netflix Hulu Prime Video Disney+
0 1 2720 1491 1129 1483 728
1 2 849 338 252 268 40
2 3 542 204 159 188 34
3 4 364 131 129 114 20
4 5 243 92 96 84 5
In [57]:
print(f'''
      The Seasons Group with Highest TV Shows Count Ever Got is '{df_season_group_high_tvshows['Seasons Group'][0]}' : '{df_season_group_high_tvshows['TV Shows Count'].max()}'\n
      The Seasons Group with Lowest TV Shows Count Ever Got is '{df_season_group_low_tvshows['Seasons Group'][0]}' : '{df_season_group_low_tvshows['TV Shows Count'].min()}'\n
      
      The Seasons Group with Highest TV Shows Count on 'Netflix' is '{netflix_season_group_high_tvshows['Seasons Group'][0]}' : '{netflix_season_group_high_tvshows['Netflix'].max()}'\n
      The Seasons Group with Lowest TV Shows Count on 'Netflix' is '{netflix_season_group_low_tvshows['Seasons Group'][0]}' : '{netflix_season_group_low_tvshows['Netflix'].min()}'\n
      
      The Seasons Group with Highest TV Shows Count on 'Hulu' is '{hulu_season_group_high_tvshows['Seasons Group'][0]}' : '{hulu_season_group_high_tvshows['Hulu'].max()}'\n
      The Seasons Group with Lowest TV Shows Count on 'Hulu' is '{hulu_season_group_low_tvshows['Seasons Group'][0]}' : '{hulu_season_group_low_tvshows['Hulu'].min()}'\n
      
      The Seasons Group with Highest TV Shows Count on 'Prime Video' is '{prime_video_season_group_high_tvshows['Seasons Group'][0]}' : '{prime_video_season_group_high_tvshows['Prime Video'].max()}'\n
      The Seasons Group with Lowest TV Shows Count on 'Prime Video' is '{prime_video_season_group_low_tvshows['Seasons Group'][0]}' : '{prime_video_season_group_low_tvshows['Prime Video'].min()}'\n
      
      The Seasons Group with Highest TV Shows Count on 'Disney+' is '{disney_season_group_high_tvshows['Seasons Group'][0]}' : '{disney_season_group_high_tvshows['Disney+'].max()}'\n
      The Seasons Group with Lowest TV Shows Count on 'Disney+' is '{disney_season_group_low_tvshows['Seasons Group'][0]}' : '{disney_season_group_low_tvshows['Disney+'].min()}'\n 
      ''')
      The Seasons Group with Highest TV Shows Count Ever Got is '1' : '2720'

      The Seasons Group with Lowest TV Shows Count Ever Got is '53' : '1'

      
      The Seasons Group with Highest TV Shows Count on 'Netflix' is '1' : '1491'

      The Seasons Group with Lowest TV Shows Count on 'Netflix' is '34' : '0'

      
      The Seasons Group with Highest TV Shows Count on 'Hulu' is '1' : '1129'

      The Seasons Group with Lowest TV Shows Count on 'Hulu' is '40' : '0'

      
      The Seasons Group with Highest TV Shows Count on 'Prime Video' is '1' : '1483'

      The Seasons Group with Lowest TV Shows Count on 'Prime Video' is '53' : '0'

      
      The Seasons Group with Highest TV Shows Count on 'Disney+' is '1' : '728'

      The Seasons Group with Lowest TV Shows Count on 'Disney+' is '24' : '0'
 
      
In [58]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = netflix_season_group_tvshows['Seasons Group'][:10], y = netflix_season_group_tvshows['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = hulu_season_group_tvshows['Seasons Group'][:10], y = hulu_season_group_tvshows['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = prime_video_season_group_tvshows['Seasons Group'][:10], y = prime_video_season_group_tvshows['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = disney_season_group_tvshows['Seasons Group'][:10], y = disney_season_group_tvshows['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
 
plt.show()
In [59]:
plt.figure(figsize = (20, 5))
sns.lineplot(x = season_group_data_tvshows['Seasons Group'], y = season_group_data_tvshows['Netflix'], color = 'red')
sns.lineplot(x = season_group_data_tvshows['Seasons Group'], y = season_group_data_tvshows['Hulu'], color = 'lightgreen')
sns.lineplot(x = season_group_data_tvshows['Seasons Group'], y = season_group_data_tvshows['Prime Video'], color = 'lightblue')
sns.lineplot(x = season_group_data_tvshows['Seasons Group'], y = season_group_data_tvshows['Disney+'], color = 'darkblue')
plt.xlabel('Seasons Group', fontsize = 15)
plt.ylabel('TV Shows Count', fontsize = 15)
plt.show()
In [60]:
print(f'''
      Accross All Platforms Total Count of Seasons Group is '{season_group_data_tvshows['Seasons Group'].unique().shape[0]}'\n
      Total Count of Seasons Group on 'Netflix' is '{netflix_season_group_tvshows['Seasons Group'].unique().shape[0]}'\n
      Total Count of Seasons Group on 'Hulu' is '{hulu_season_group_tvshows['Seasons Group'].unique().shape[0]}'\n
      Total Count of Seasons Group on 'Prime Video' is '{prime_video_season_group_tvshows['Seasons Group'].unique().shape[0]}'\n
      Total Count of Seasons Group on 'Disney+' is '{disney_season_group_tvshows['Seasons Group'].unique().shape[0]}'\n 
      ''')
      Accross All Platforms Total Count of Seasons Group is '48'

      Total Count of Seasons Group on 'Netflix' is '30'

      Total Count of Seasons Group on 'Hulu' is '43'

      Total Count of Seasons Group on 'Prime Video' is '32'

      Total Count of Seasons Group on 'Disney+' is '14'
 
      
In [61]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.lineplot(y = season_group_data_tvshows['Seasons Group'], x = season_group_data_tvshows['Netflix'], color = 'red', ax = axes[0, 0])
h_ru_ax2 = sns.lineplot(y = season_group_data_tvshows['Seasons Group'], x = season_group_data_tvshows['Hulu'], color = 'lightgreen', ax = axes[0, 1])
p_ru_ax3 = sns.lineplot(y = season_group_data_tvshows['Seasons Group'], x = season_group_data_tvshows['Prime Video'], color = 'lightblue', ax = axes[1, 0])
d_ru_ax4 = sns.lineplot(y = season_group_data_tvshows['Seasons Group'], x = season_group_data_tvshows['Disney+'], color = 'darkblue', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])

plt.show()
In [62]:
fig, axes = plt.subplots(2, 2, figsize = (20 , 20))
 
n_ru_ax1 = sns.barplot(x = season_group_data_tvshows['Seasons Group'][:10], y = season_group_data_tvshows['Netflix'][:10], palette = 'Reds_r', ax = axes[0, 0])
h_ru_ax2 = sns.barplot(x = season_group_data_tvshows['Seasons Group'][:10], y = season_group_data_tvshows['Hulu'][:10], palette = 'Greens_r', ax = axes[0, 1])
p_ru_ax3 = sns.barplot(x = season_group_data_tvshows['Seasons Group'][:10], y = season_group_data_tvshows['Prime Video'][:10], palette = 'Blues_r', ax = axes[1, 0])
d_ru_ax4 = sns.barplot(x = season_group_data_tvshows['Seasons Group'][:10], y = season_group_data_tvshows['Disney+'][:10], palette = 'BuPu_r', ax = axes[1, 1])
 
labels = ['Netflix', 'Hulu', 'Prime Video', 'Disney+']
 
n_ru_ax1.title.set_text(labels[0])
h_ru_ax2.title.set_text(labels[1])
p_ru_ax3.title.set_text(labels[2])
d_ru_ax4.title.set_text(labels[3])
 
plt.show()